library(ggplot2)
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(spatialreg)
Loading required package: spData
To access larger datasets in this package, install the spDataLarge package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
Loading required package: Matrix
library(spdep)
Attaching package: ‘spdep’
The following objects are masked from ‘package:spatialreg’:
get.ClusterOption, get.coresOption, get.mcOption, get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption, set.coresOption,
set.mcOption, set.VerboseOption, set.ZeroPolicyOption
rm(list=ls())
mypath <- "./DataBackups/"
profession_names <- c(
"sales professions",
"mechatronics, energy, and electrical professions",
"professions in business management and organization",
"medical health professions",
"machine and vehicle technology professions",
"all professions"
)
kreise <- sf::st_read("./DataPreparation/Shapes/Kreise/vg1000_12-31.utm32s.shape.ebenen/vg1000_ebenen_1231/VG1000_KRS.shp") # though not imported directly, all other auxiliary files must be present in the path
Reading layer `VG1000_KRS' from data source
`/home/dennis/Desktop/Spatial Regression Analysis_cleaned/DataPreparation/Shapes/Kreise/vg1000_12-31.utm32s.shape.ebenen/vg1000_ebenen_1231/VG1000_KRS.shp'
using driver `ESRI Shapefile'
Simple feature collection with 424 features and 25 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 280353.1 ymin: 5235878 xmax: 921261.6 ymax: 6101482
Projected CRS: ETRS89 / UTM zone 32N
kreise <- st_transform(kreise, "WGS84")
kreise_merged <- kreise %>%
group_by(SN_L) %>%
summarize(geometry = st_union(geometry))
library(stringr)
library(ggrepel)
model_list <- list()
graphics_list <- list()
spatial_clustering_list <- list()
load(file = "./DataBackups/weights_data.RData")
for (p in profession_names)
{
#p <- profession_names[1]
load(paste0(mypath,p,"M.Rdata"))
load(file = "./DataBackups/weights_data.RData")
print(p)
target = "M"
formula_base <- as.formula(paste0(target," ~ U + V "))
formula <- as.formula(paste0(target," ~ U + V + unemp25 + cons + highSE + medSE "))
formula_pub_num <- as.formula(paste0(target," ~ U + V + unemp25 + cons + highSE + medSE + Agglomeration"))
target = "M"
profession_new$Agglomeration <- profession_new$AgglomerationPublicPop49
# Begin Weight Matrix computation
columns_of_interest <- c('M','U', 'V', 'unemp25', 'cons', 'lowSE', 'medSE', 'Agglomeration')
# Replace Inf and -Inf with NA in the specified columns
profession_new[columns_of_interest] <- lapply(profession_new[columns_of_interest], function(x) replace(x, is.infinite(x), NA))
# Remove rows with any NA values in the specified columns
profession_new <- profession_new[complete.cases(profession_new[columns_of_interest]), ]
best_Weights_public_pop <- best_Weights_public_pop_mat[colnames(best_Weights_public_pop_mat) %in% profession_new$region,colnames(best_Weights_public_pop_mat) %in% profession_new$region]
################
library(dplyr)
distances_relevant_pop <- merge(distances_relevant,final, by.x="city...1",by.y="city")
distances_relevant_pop <- merge(distances_relevant_pop,final,by.x="city...4",by.y="city")
profession_new <- profession_new[!is.na(profession_new$OpenPositions),]
## As all Professions have different Regions, we need to Reduce the matrix to relevant regions. As the initial matrix, already does not contain all regions, it is easier to re-calculate evrything.
distances_relevant_test_public <- distances_relevant_pop %>% group_by(ABDistrict...6) %>% mutate(district_pop = sum(unique(maxPopulation.y))) %>% ungroup %>% filter(ABDistrict...3 != ABDistrict...6) %>% filter(public <= 51.28167) %>% group_by(city...1, ABDistrict...3, ABDistrict...6) %>% summarise(district_pop = max(district_pop), count= sum(unique(maxPopulation.y)), Weight=count/district_pop, .groups="keep") %>% group_by(ABDistrict...3, ABDistrict...6) %>% summarise(Weight=mean(Weight), .groups="keep")
Weights_public_pop <- matrix(0, nrow=nrow(profession_new), ncol=nrow(profession_new))
row.names(Weights_public_pop)<- profession_new$region
colnames(Weights_public_pop) <- profession_new$region
for(i in 1:nrow(distances_relevant_test_public))
{
Weights_public_pop[row.names(Weights_public_pop)==distances_relevant_test_public$ABDistrict...3[i], colnames(Weights_public_pop)==distances_relevant_test_public$ABDistrict...6[i]]<- distances_relevant_test_public$Weight[i]
}
Weights_public_pop <- Weights_public_pop/max(Weights_public_pop)
model <- lmSLX(formula_pub_num , profession_new, mat2listw(Weights_public_pop, style="M"), na.action=na.omit, zero.policy=TRUE, Durbin=~ U + V -1)
model_list$x <- model
names(model_list)<-c(names(model_list)[1:(length(names(model_list)))-1], p)
#
estimate_vector<- model$coefficients[-c(2,3,9:10)]
estimate_values <- as.matrix(cbind("(Intercept)"=1,st_drop_geometry(profession_new[,names(estimate_vector[-c(1)])])))
profession_new$Efficiency <- exp(estimate_values %*% estimate_vector)
map_efficiency <- ggplot() +
geom_sf(data=districts, fill = "#e4e4e4") +
geom_sf(data=st_as_sf(profession_new), aes(fill = Efficiency))+
geom_sf(data=kreise_merged,fill=NA,
color = "#ec7063",
lwd = 1.2) +
theme(legend.text.align = 1) +
labs(fill = str_wrap(paste0("Efficiency for ", p), width = 50)) +
guides(
fill = guide_colorbar(direction = "horizontal", title.hjust=0.5, title.position="top", barwidth = 20, # Adjust this value to change the width of the colorbar
barheight = 1 ),
# Adjust rows for the fill legend
) +
theme(
legend.position = "bottom",
legend.text = element_text(size = 18),
legend.title = element_text(size = 18, face="bold"),
legend.box = "horizontal",
legend.box.just = "center",
legend.spacing.x = unit(0.5, "cm")
)
map_efficiency <- map_efficiency + scale_fill_gradientn(colors = custom_colors(100), na.value="#e4e4e4")
print(map_efficiency)
graphics_list$x <- map_efficiency
names(graphics_list)<-names(model_list)
}
[1] "sales professions"
Warning: style is M (missing); style should be set to a valid valueWarning: missing spatial weights styleWarning: The `legend.text.align` argument of `theme()` is deprecated as of ggplot2 3.5.0.
Please use theme(legend.text = element_text(hjust)) instead.
[1] "mechatronics, energy, and electrical professions"
[1] "professions in business management and organization"
[1] "medical health professions"
[1] "machine and vehicle technology professions"
[1] "all professions"






library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
combined_plot <- do.call(grid.arrange, c(graphics_list, ncol = 2, nrow = 3))

print(combined_plot)
TableGrob (3 x 2) "arrange": 6 grobs
#combined_plot <- do.call(grid.arrange, c(spatial_clustering_list, ncol = 2, nrow = 3))
#print(combined_plot)
library(jtools)
library(huxtable)
Attaching package: ‘huxtable’
The following object is masked from ‘package:dplyr’:
add_rownames
The following object is masked from ‘package:ggplot2’:
theme_grey
# Combine model summaries into a single huxtable
ht <- export_summs(model_list$`all professions`,
model_list$`machine and vehicle technology professions`,
model_list$`mechatronics, energy, and electrical professions`,
model_list$`medical health professions`,
model_list$`professions in business management and organization`,
model_list$`sales professions`,
model.names = c("All Professions",
"Machine & Vehicle Technology",
"Mechatronics, Energy, Electrical",
"Medical Health",
"Business Management",
"Sales"),
number_format = "%.3f",
statistics = c("logLik","AIC","BIC","N. obs." = "nobs"),
stars = c(`***` = 0.001, `**` = 0.01, `*` = 0.05, `^` = 0.1))
Warning: The `tidy()` method for objects of class `SlX` is not maintained by the broom team, and is only supported through the `lm` tidier method. Please be cautious in interpreting and reporting broom output.
# Apply any additional formatting to the huxtable if needed
ht <- ht %>%
set_header_rows(1, TRUE) %>%
set_header_cols(1, TRUE)
# Display the huxtable
print_screen(ht)
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
All Professions Machine & Vehicle Mechatronics, Medical Health Business Management Sales
Technology Energy, Electrical
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
(Intercept) -0.509 * -0.324 -0.482 * -0.916 ** -0.551 * -0.944 **
(0.207) (0.218) (0.196) (0.338) (0.225) (0.313)
U 0.681 *** 0.508 *** 0.694 *** 0.566 *** 0.500 *** 0.691 ***
(0.039) (0.041) (0.038) (0.052) (0.046) (0.034)
V 0.316 *** 0.503 *** 0.320 *** 0.446 *** 0.519 *** 0.319 ***
(0.039) (0.042) (0.038) (0.050) (0.045) (0.033)
unemp25 -0.014 -0.006 -0.021 * 0.017 -0.008 0.005
(0.008) (0.009) (0.008) (0.016) (0.010) (0.013)
cons 0.051 * 0.002 0.065 * 0.127 ** 0.028 0.034
(0.025) (0.027) (0.025) (0.042) (0.029) (0.036)
highSE -0.002 0.005 0.017 -0.002 0.007 0.034
(0.024) (0.027) (0.024) (0.039) (0.028) (0.039)
medSE 0.044 0.042 0.007 0.033 0.050 0.092 ^
(0.030) (0.033) (0.029) (0.048) (0.034) (0.047)
Agglomeration 0.019 *** -0.003 0.007 0.028 ** 0.006 0.021 **
(0.005) (0.005) (0.005) (0.009) (0.007) (0.008)
lag.U -0.070 * -0.084 ** -0.036 0.095 -0.015 0.005
(0.031) (0.032) (0.033) (0.059) (0.041) (0.029)
lag.V 0.069 * 0.086 ** 0.035 -0.095 0.015 -0.006
(0.032) (0.033) (0.034) (0.059) (0.042) (0.029)
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
logLik 299.984 276.552 276.644 193.812 247.251 232.410
AIC -577.969 -531.104 -531.288 -365.623 -472.503 -442.819
BIC -545.149 -498.825 -499.412 -335.053 -441.303 -410.228
N. obs. 146 139 134 119 126 143
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
*** p < 0.001; ** p < 0.01; * p < 0.05; ^ p < 0.1.
Column names: names, All Professions, Machine & Vehicle Technology, Mechatronics, Energy, Electrical, Medical Health, Business Management, Sales
# Export to HTML
quick_html(ht, file = "./Outputs/Regressions/Profession_differences.html")
kf.service.services: KApplicationTrader: mimeType "x-scheme-handler/file" not found
kf.service.services: KApplicationTrader: mimeType "x-scheme-handler/file" not found
library(ggplot2)
library(dplyr)
library(tidyr)
Attaching package: ‘tidyr’
The following objects are masked from ‘package:Matrix’:
expand, pack, unpack
library(stringr)
library(ggplot2)
# Extract coefficients and create a data frame
# Extract coefficients and create a data frame
# Extract coefficients, standard errors and create a data frame
coef_df <- do.call(rbind, lapply(names(model_list), function(profession) {
model <- model_list[[profession]]
tidy_model <- broom::tidy(model)
tidy_model$Profession <- profession
tidy_model
}))
Opening in existing browser session.
# Function to add line breaks to labels
add_line_breaks <- function(label, width = 30) {
str_wrap(label, width = width)
}
#Extract coefficients, standard errors and create a data frame
coef_df <- do.call(rbind, lapply(names(model_list), function(profession) {
model <- model_list[[profession]]
tidy_model <- broom::tidy(model)
tidy_model$Profession <- profession
tidy_model
}))
# Create an offset variable for the y-axis
coef_df <- coef_df %>%
group_by(term) %>%
mutate(offset = as.numeric(factor(Profession)) * 0.1) %>% # Adjust the offset multiplier as needed
ungroup()
# Add the English profession column
coef_df$term[coef_df$term=="(Intercept)"] <- "Efficiency"
coef_df$term[coef_df$term=="AgglomerationDistPop"] <- "Agglomeration"
coef_df$term[coef_df$term=="lag.U"] <- "U.lag"
coef_df$term[coef_df$term=="lag.V"] <- "V.lag"
coef_df1 <- coef_df[coef_df$term %in% c("U","V","U.lag","V.lag","Efficiency", "lambda"),]
coef_df2 <- coef_df[!(coef_df$term %in% c("U","V","U.lag","V.lag","Efficiency","lambda")),]
library(viridis)
Loading required package: viridisLite
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:viridis’:
viridis_pal
The following object is masked from ‘package:huxtable’:
number_format
dark_viridis_colors <- viridis_pal(option = "G")(10)[3:8]
# Plot the coefficients with error bars and vertical offset
coef_df1$term <- factor(coef_df1$term, levels = c("lambda", "U.lag","V.lag", "U", "V", "Efficiency" ))
plot_eff1 <- ggplot(coef_df1, aes(x = term, y = estimate, color = Profession))+
geom_point(position = position_dodge(width = 0.8), size = 3) +
geom_errorbar(aes(ymin = estimate - 1.96 * std.error, ymax = estimate + 1.96 *std.error),
width = 0.2, size = 1, position = position_dodge(width = 0.8))+
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = 6) +
coord_flip() +
theme_minimal() +
labs(
x = "Coefficient Term",
y = "Estimate",
color = "Profession") +
theme(axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
legend.position = "bottom", legend.text = element_text(size = 14),
plot.margin = margin(t = 10, r = 40, b = 10, l = 10),
legend.title = element_text(size = 16),
legend.title.position = "top",
legend.box = "horizontal",
# Adjust margins (top, right, bottom, left)
panel.grid.major = element_line(color = "darkgray", size = 0.25), # Darker major grid lines
panel.grid.minor = element_line(color = "gray", size = 0.25),
axis.title = element_text(size = 16),
plot.title = element_text(size = 20)) +
guides(color = guide_legend(title.position = "top", nrow = 6, byrow = TRUE)) +
# scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)),limits = c(-0.2, 0.2)) +
scale_x_discrete(labels = function(x) sapply(x, add_line_breaks))+
scale_color_manual(values=custom_disc_color) + guides(color = guide_legend(ncol = 2))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
Please use the `linewidth` argument instead.
plot_eff2 <- ggplot(coef_df2, aes(color = Profession, y = estimate, x= term)) +
geom_point(position = position_dodge(width = 0.8), size = 3) +
geom_errorbar(aes(ymin = estimate - 1.96 *std.error, ymax = estimate + 1.96 *std.error),
width = 0.2, size = 1, position = position_dodge(width = 0.8)) +
geom_vline(xintercept = 0, linetype = "solid", color = "black", size = 1.2)+
coord_flip() +
theme_minimal() +
labs(
x = "Coefficient Term",
y = "Estimate",
color = "Profession") +
theme(axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16),
# Adjust margins (top, right, bottom, left)
panel.grid.major = element_line(color = "darkgray", size = 0.25), # Darker major grid lines
panel.grid.minor = element_line(color = "gray", size = 0.25),
plot.title = element_text(size = 20),
legend.position = "bottom", legend.text = element_text(size = 14),
plot.margin = margin(t = 10, r = 40, b = 10, l = 10),
legend.title = element_text(size = 16)) +
guides(color = guide_legend(title.position = "top", nrow = 6, byrow = TRUE)) +
scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)),limits = c(-0.21, 0.21)) +
scale_x_discrete(labels = function(x) sapply(x, add_line_breaks))+
scale_color_manual(values=custom_disc_color)
plot_eff1

library(ggplot2)
library(ggpubr)
Attaching package: ‘ggpubr’
The following object is masked from ‘package:huxtable’:
font
# Arrange with shared legend
combined <- ggarrange(
plot_eff1, plot_eff2,
ncol = 2, nrow = 1,
common.legend = TRUE, # <- share legend
legend = "bottom" # <- put legend below
)
# Save as PNG
ggsave("./Outputs/Figures/plots_combined.png", combined,
width = 12, height = 12, units = "in", dpi = 600)

---
title: "R Notebook"
output: html_notebook
---
 


```{r fig.height=12, fig.width=16}

library(ggplot2)
library(sf)
library(dplyr)
library(spatialreg)
library(spdep)

rm(list=ls())
mypath <- "./DataBackups/"
profession_names <- c(
  "sales professions",
  "mechatronics, energy, and electrical professions",
  "professions in business management and organization",
  "medical health professions",
  "machine and vehicle technology professions",
  "all professions"
)

kreise <- sf::st_read("./DataPreparation/Shapes/Kreise/vg1000_12-31.utm32s.shape.ebenen/vg1000_ebenen_1231/VG1000_KRS.shp") # though not imported directly, all other auxiliary files must be present in the path
kreise <- st_transform(kreise, "WGS84")
kreise_merged <- kreise %>%
  group_by(SN_L) %>%
  summarize(geometry = st_union(geometry))
```

```{r fig.height=12, fig.width=16}
library(stringr)
library(ggrepel)


model_list <- list()

graphics_list <- list()
spatial_clustering_list <- list()
load(file = "./DataBackups/weights_data.RData")

for (p in profession_names)
{
#p <- profession_names[1]  
  
load(paste0(mypath,p,"M.Rdata"))
load(file = "./DataBackups/weights_data.RData")

print(p)

target = "M"
formula_base <- as.formula(paste0(target,"  ~ U + V "))
formula <- as.formula(paste0(target,"   ~ U + V + unemp25 + cons + highSE +  medSE  "))
formula_pub_num <- as.formula(paste0(target,"   ~ U + V + unemp25 + cons + highSE + medSE  + Agglomeration"))
target = "M"



profession_new$Agglomeration <- profession_new$AgglomerationPublicPop49


# Begin Weight Matrix computation
columns_of_interest <- c('M','U', 'V', 'unemp25', 'cons', 'lowSE', 'medSE', 'Agglomeration')
# Replace Inf and -Inf with NA in the specified columns
profession_new[columns_of_interest] <- lapply(profession_new[columns_of_interest], function(x) replace(x, is.infinite(x), NA))
# Remove rows with any NA values in the specified columns
profession_new <- profession_new[complete.cases(profession_new[columns_of_interest]), ]

best_Weights_public_pop <- best_Weights_public_pop_mat[colnames(best_Weights_public_pop_mat) %in% profession_new$region,colnames(best_Weights_public_pop_mat) %in% profession_new$region]

################
library(dplyr)

distances_relevant_pop <- merge(distances_relevant,final, by.x="city...1",by.y="city")
distances_relevant_pop <- merge(distances_relevant_pop,final,by.x="city...4",by.y="city")

profession_new <- profession_new[!is.na(profession_new$OpenPositions),]

## As all Professions have different Regions, we need to Reduce the matrix to relevant regions. As the initial matrix, already does not contain all regions, it is easier to re-calculate evrything.
distances_relevant_test_public <-  distances_relevant_pop %>% group_by(ABDistrict...6) %>% mutate(district_pop = sum(unique(maxPopulation.y))) %>% ungroup %>% filter(ABDistrict...3 != ABDistrict...6)  %>%  filter(public <= 51.28167) %>% group_by(city...1, ABDistrict...3, ABDistrict...6) %>%  summarise(district_pop = max(district_pop), count= sum(unique(maxPopulation.y)), Weight=count/district_pop, .groups="keep") %>% group_by(ABDistrict...3, ABDistrict...6) %>% summarise(Weight=mean(Weight), .groups="keep")


Weights_public_pop  <- matrix(0, nrow=nrow(profession_new), ncol=nrow(profession_new))
  row.names(Weights_public_pop)<- profession_new$region
  colnames(Weights_public_pop) <- profession_new$region

  for(i in 1:nrow(distances_relevant_test_public))
  {
    Weights_public_pop[row.names(Weights_public_pop)==distances_relevant_test_public$ABDistrict...3[i], colnames(Weights_public_pop)==distances_relevant_test_public$ABDistrict...6[i]]<- distances_relevant_test_public$Weight[i]
  }
  
  Weights_public_pop <- Weights_public_pop/max(Weights_public_pop)

  model <- lmSLX(formula_pub_num , profession_new, mat2listw(Weights_public_pop, style="M"), na.action=na.omit, zero.policy=TRUE, Durbin=~  U + V  -1)

  model_list$x <- model
  names(model_list)<-c(names(model_list)[1:(length(names(model_list)))-1], p)
  
  #
  estimate_vector<- model$coefficients[-c(2,3,9:10)]
  estimate_values <- as.matrix(cbind("(Intercept)"=1,st_drop_geometry(profession_new[,names(estimate_vector[-c(1)])])))
  
  
  profession_new$Efficiency <- exp(estimate_values %*% estimate_vector) 

map_efficiency <- ggplot() + 
  geom_sf(data=districts, fill = "#e4e4e4") +
  geom_sf(data=st_as_sf(profession_new), aes(fill = Efficiency))+
  geom_sf(data=kreise_merged,fill=NA,
          color = "#ec7063",   
          lwd = 1.2) +
  theme(legend.text.align = 1)  +
  labs(fill = str_wrap(paste0("Efficiency for ", p), width = 50)) + 
  guides(
    fill = guide_colorbar(direction = "horizontal", title.hjust=0.5, title.position="top", barwidth = 20,  # Adjust this value to change the width of the colorbar
      barheight = 1  ), 
# Adjust rows for the fill legend
  ) +
  theme(
    legend.position = "bottom",
    legend.text = element_text(size = 18),
    legend.title = element_text(size = 18, face="bold"),
    legend.box = "horizontal",
    legend.box.just = "center",
    legend.spacing.x = unit(0.5, "cm") 
  )


map_efficiency <- map_efficiency +  scale_fill_gradientn(colors = custom_colors(100), na.value="#e4e4e4")
print(map_efficiency)


  graphics_list$x <- map_efficiency
  names(graphics_list)<-names(model_list)

}

```



```{r fig.height=24, fig.width=16}

library(gridExtra)
combined_plot <- do.call(grid.arrange, c(graphics_list, ncol = 2, nrow = 3))
print(combined_plot)


#combined_plot <- do.call(grid.arrange, c(spatial_clustering_list, ncol = 2, nrow = 3))
#print(combined_plot)


```

```{r}
library(jtools)
library(huxtable)
# Combine model summaries into a single huxtable
ht <- export_summs(model_list$`all professions`,
                   model_list$`machine and vehicle technology professions`,
                   model_list$`mechatronics, energy, and electrical professions`,
                   model_list$`medical health professions`,
                   model_list$`professions in business management and organization`,
                   model_list$`sales professions`,
                   model.names = c("All Professions",
                                   "Machine & Vehicle Technology",
                                   "Mechatronics, Energy, Electrical",
                                   "Medical Health",
                                   "Business Management",
                                   "Sales"), 
  number_format = "%.3f", 
  statistics = c("logLik","AIC","BIC","N. obs." = "nobs"),
  stars = c(`***` = 0.001, `**` = 0.01, `*` = 0.05, `^` = 0.1))

# Apply any additional formatting to the huxtable if needed
ht <- ht %>%
  
  set_header_rows(1, TRUE) %>%
  set_header_cols(1, TRUE)

# Display the huxtable
print_screen(ht)

# Export to HTML
quick_html(ht, file = "./Outputs/Regressions/Profession_differences.html")



```



```{R fig.height=10, fig.width=7}

library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)



# Extract coefficients and create a data frame
# Extract coefficients and create a data frame
# Extract coefficients, standard errors and create a data frame
coef_df <- do.call(rbind, lapply(names(model_list), function(profession) {
  model <- model_list[[profession]]
  tidy_model <- broom::tidy(model)
  tidy_model$Profession <- profession
  tidy_model
}))


# Function to add line breaks to labels
add_line_breaks <- function(label, width = 30) {
  str_wrap(label, width = width)
}



 #Extract coefficients, standard errors and create a data frame
coef_df <- do.call(rbind, lapply(names(model_list), function(profession) {
  model <- model_list[[profession]]
  tidy_model <- broom::tidy(model)
  tidy_model$Profession <- profession
  tidy_model
}))

# Create an offset variable for the y-axis
coef_df <- coef_df %>%
  group_by(term) %>%
  mutate(offset = as.numeric(factor(Profession)) * 0.1) %>%  # Adjust the offset multiplier as needed
  ungroup()

# Add the English profession column



coef_df$term[coef_df$term=="(Intercept)"] <- "Efficiency"
coef_df$term[coef_df$term=="AgglomerationDistPop"] <- "Agglomeration"

coef_df$term[coef_df$term=="lag.U"] <- "U.lag"
coef_df$term[coef_df$term=="lag.V"] <- "V.lag"


coef_df1 <- coef_df[coef_df$term %in% c("U","V","U.lag","V.lag","Efficiency", "lambda"),]

coef_df2 <- coef_df[!(coef_df$term %in% c("U","V","U.lag","V.lag","Efficiency","lambda")),]

library(viridis)
library(scales)
dark_viridis_colors <- viridis_pal(option = "G")(10)[3:8]

# Plot the coefficients with error bars and vertical offset

coef_df1$term <- factor(coef_df1$term, levels = c("lambda", "U.lag","V.lag", "U", "V",  "Efficiency" ))

plot_eff1 <- ggplot(coef_df1, aes(x = term, y = estimate, color = Profession))+
  geom_point(position = position_dodge(width = 0.8), size = 3) +
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, ymax = estimate +  1.96 *std.error),
                width = 0.2, size = 1, position = position_dodge(width = 0.8))+
  geom_vline(xintercept = 0, linetype = "solid", color = "black", size = 6) +
  coord_flip() +
  theme_minimal() +
  labs(
       x = "Coefficient Term",
       y = "Estimate",
       color = "Profession") +
theme(axis.text.x = element_text(size = 16),
        axis.text.y = element_text(size = 16),
        legend.position = "bottom",   legend.text = element_text(size = 14),
        plot.margin = margin(t = 10, r = 40, b = 10, l = 10),
  legend.title = element_text(size = 16),
  legend.title.position = "top",
  legend.box = "horizontal",
    # Adjust margins (top, right, bottom, left)
    panel.grid.major = element_line(color = "darkgray", size = 0.25),  # Darker major grid lines
    panel.grid.minor = element_line(color = "gray", size = 0.25),
      axis.title = element_text(size = 16),
    plot.title = element_text(size = 20)) +
  guides(color = guide_legend(title.position = "top", nrow = 6, byrow = TRUE)) +
#  scale_y_continuous(expand = expansion(mult = c(0.1, 0.1)),limits = c(-0.2, 0.2))   +
  scale_x_discrete(labels = function(x) sapply(x, add_line_breaks))+
  scale_color_manual(values=custom_disc_color) + guides(color = guide_legend(ncol = 2))



plot_eff2 <- ggplot(coef_df2, aes(color = Profession, y = estimate, x= term)) +
  geom_point(position = position_dodge(width = 0.8), size = 3)  +
  geom_errorbar(aes(ymin = estimate -  1.96 *std.error, ymax = estimate +  1.96 *std.error),
                width = 0.2, size = 1, position = position_dodge(width = 0.8)) +
  geom_vline(xintercept = 0, linetype = "solid", color = "black", size = 1.2)+
  coord_flip() +
  theme_minimal() +
  labs(
       x = "Coefficient Term",
       y = "Estimate",
       color = "Profession") +
theme(axis.text.x = element_text(size = 16),
        axis.text.y = element_text(size = 16),
          axis.title = element_text(size = 16),
        # Adjust margins (top, right, bottom, left)
    panel.grid.major = element_line(color = "darkgray", size = 0.25),  # Darker major grid lines
    panel.grid.minor = element_line(color = "gray", size = 0.25),
    plot.title = element_text(size = 20),
        legend.position = "bottom",   legend.text = element_text(size = 14),
      plot.margin = margin(t = 10, r = 40, b = 10, l = 10),
  legend.title = element_text(size = 16)) +
  guides(color = guide_legend(title.position = "top", nrow = 6, byrow = TRUE)) +
  scale_y_continuous(expand = expansion(mult = c(0.15, 0.15)),limits = c(-0.21, 0.21))   +
  scale_x_discrete(labels = function(x) sapply(x, add_line_breaks))+
  scale_color_manual(values=custom_disc_color)

plot_eff1
```




```{r}
library(ggplot2)
library(ggpubr)


# Arrange with shared legend
combined <- ggarrange(
  plot_eff1, plot_eff2,
  ncol = 2, nrow = 1,
  common.legend = TRUE,   # <- share legend
  legend = "bottom"       # <- put legend below
)

# Save as PNG
ggsave("./Outputs/Figures/plots_combined.png", combined,
       width = 12, height = 12, units = "in", dpi = 600)
```

